import requests
file_url='https://data.cityofchicago.org/api/views/kf7e-cur8/rows.csv?accessType=DOWNLOAD'
r = requests.get(file_url, stream = True)
with open("chicago.csv", "wb") as file:
for block in r.iter_content(chunk_size = 1024):
if block:
file.write(block)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns #advanced visualization library
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
data = pd.read_csv('chicago.csv')
TIME : Timestamp of the record
RGION_ID : Unique arbitrary number to represent each region
SPEED : Estimated congestion level. Although expressed in miles per hour, this value is more a reflection of the congestion level in the region than it is indicative of the average raw speed vehicles are travelling within the region.
REGION : Name of the region.
BUS_COUNT : The number of buses used to estimate traffic.
NUM_READS : Number of GPS probes received(or used) for estimating the speed for that segment.
WEST: Approximate longitude of the west edge of the region.
EAST: Approximate longitude of the east edge of the region.
SOUTH : Approximate latitude of the south edge of the region.
NORTH : Approximate latitude of the north edge of the region.
NW_LOACATION : The location corresponding to the intersection of NORTH and WEST in a format that allows for creation of maps and other geographic operations on this data portal.
SE_LOCATION : The location corresponding to the intersection of SOUTH and EAST in a format that allows for creation of maps and other geographic operations on this data portal.
data["TIME"]=pd.to_datetime(data["TIME"], format="%m/%d/%Y %I:%M:%S %p")
data=data[~data["SPEED"].isin([0])]
data=data[~data["HOUR"].isin([21,22,23,0,1,2,3,4,5,6])]
data['DAY'] = data['TIME'].dt.day
data['MONTH'] = data['TIME'].dt.month
data['YEAR'] = data['TIME'].dt.year
Faire une liste des regions (j'en aurai besoin pour les maps)
list_REGION = []
for i in range(29) :
reg = data[(data['REGION_ID']==i+1)].REGION.unique()[0]
list_REGION.append(reg)
data = data.groupby(['REGION_ID','MONTH','DAY','YEAR','HOUR','NORTH','WEST','EAST', 'SOUTH','DAY_OF_WEEK'])[['SPEED','BUS_COUNT','NUM_READS']].agg('mean').reset_index()
data["SPEEDKM"]=data["SPEED"]*1.609
data['MINUTE'] = '00'
data['Time'] = pd.to_datetime(data[['YEAR','MONTH','DAY','HOUR','MINUTE']].astype(str).agg('-'.join,axis=1),format='%Y-%m-%d-%H-%M')
data['CENTER_LAT']=data['NORTH']*0.5+0.5*data['SOUTH']
data['CENTER_LON']=data['EAST']*0.5+0.5*data['WEST']
data['Time'] = data.Time.dt.strftime("%a, %d %b, %Y at %l:%M %p")
data.head()
some things to know about Folium
import folium
from folium import plugins
from folium.plugins import HeatMapWithTime
def generateBaseMap(default_location=[40.693943, -73.985880] ,default_zoom_start=10):
base_map = folium.Map(location=default_location, control_scale=True, zoom_start=default_zoom_start)
return base_map
map = generateBaseMap([41.881832, -87.623177])
data['speed'] = 1/ data['SPEED']
We can also animate our heat maps to change the data being shown on it based on certain dimensions (hour,month) using class method called HeatMapWithTime.
The SPEED in the 29 regions for a whole week (Monday -> Sunday).
I choose the week of the 8th to 14th of april 2019
sub_set = data[(data['YEAR']==2019) & (data['MONTH'] == 3) & (17 <= data['DAY']) & (data['DAY'] <= 23)].copy()
sub_set = sub_set.replace([np.inf, -np.inf], 0)
l=[]
for date in sub_set.Time.unique():
sub = sub_set[sub_set['Time'] == date][['CENTER_LAT','CENTER_LON','speed']].values.tolist()
l.append(sub)
Liste des coordonnées des 29 regions
l1 = []
l2= []
for i in range(29) :
j = l[0][i]
l2 = j[0:1] + j[1:2]
l1.append(l2)
l2 = []
base_map = generateBaseMap([41.881832, -87.623177])
HeatMapWithTime(l, index = sub_set.Time.unique().tolist() ,radius=40, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}, min_opacity=0.5, max_opacity=0.8, use_local_extrema=True).add_to(base_map)
fg = folium.FeatureGroup(name='The 29 regions of Chicago')
base_map.add_child(fg)
for i in range(29) :
r = plugins.FeatureGroupSubGroup(fg, str(list_REGION[i]))
base_map.add_child(r)
folium.Marker(l1[i]).add_to(r)
folium.LayerControl(collapsed=False).add_to(base_map)
base_map
import os
base_map.save(os.path.join( 'SPEED.html'))
The BUS_COUNT in the 29 regions for a whole week (Monday -> Sunday).
I choose the week of the 21st to 27th of october 2019
sub_set = data[(data['YEAR']==2019) & (data['MONTH'] == 10) & (21 <= data['DAY']) & (data['DAY'] <= 27)].copy()
l=[]
for date in sub_set.Time.unique():
sub = sub_set[sub_set['Time'] == date][['CENTER_LAT','CENTER_LON','BUS_COUNT']].values.tolist()
l.append(sub)
base_map = generateBaseMap([41.881832, -87.623177])
HeatMapWithTime(l, index = sub_set.Time.unique().tolist() ,radius=40, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}, min_opacity=0.5, max_opacity=0.8, use_local_extrema=True).add_to(base_map)
fg = folium.FeatureGroup(name='The 29 regions of Chicago')
base_map.add_child(fg)
for i in range(29) :
r = plugins.FeatureGroupSubGroup(fg, str(list_REGION[i]))
base_map.add_child(r)
folium.Marker(l1[i]).add_to(r)
folium.LayerControl(collapsed=False).add_to(base_map)
base_map
import os
base_map.save(os.path.join('BUS_COUNT.html'))
In this part am only going focus on Sundays' traffic of 2018 & 2019
sub_set = data[((data['YEAR']==2018) | (data['YEAR']==2019)) & (data['DAY_OF_WEEK'] == 1)].copy()
l=[]
for date in sub_set.Time.sort_values().unique():
sub = sub_set[sub_set['Time'] == date][['CENTER_LAT','CENTER_LON','speed']].values.tolist()
l.append(sub)
base_map = generateBaseMap([41.881832, -87.623177])
HeatMapWithTime(l, index = sub_set.Time.sort_values().unique().tolist(),radius=40, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}, min_opacity=0.5, max_opacity=0.8, use_local_extrema=True).add_to(base_map)
fg = folium.FeatureGroup(name='The 29 regions of Chicago')
base_map.add_child(fg)
for i in range(29) :
r = plugins.FeatureGroupSubGroup(fg, str(list_REGION[i]))
base_map.add_child(r)
folium.Marker(l1[i]).add_to(r)
folium.LayerControl(collapsed=False).add_to(base_map)
base_map
import os
base_map.save(os.path.join('SPEED_SUNDAYS.html'))
In this part am only going focus on Mondays' traffic of 2018 & 2019
sub_set = data[((data['YEAR']==2018) | (data['YEAR']==2019)) & (data['DAY_OF_WEEK'] == 2)].copy()
l=[]
for date in sub_set.Time.sort_values().unique():
sub = sub_set[sub_set['Time'] == date][['CENTER_LAT','CENTER_LON','speed']].values.tolist()
l.append(sub)
base_map = generateBaseMap([41.881832, -87.623177])
HeatMapWithTime(l, index = sub_set.Time.sort_values().unique().tolist(),radius=40, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}, min_opacity=0.5, max_opacity=0.8, use_local_extrema=True).add_to(base_map)
fg = folium.FeatureGroup(name='The 29 regions of Chicago')
base_map.add_child(fg)
for i in range(29) :
r = plugins.FeatureGroupSubGroup(fg, str(list_REGION[i]))
base_map.add_child(r)
folium.Marker(l1[i]).add_to(r)
folium.LayerControl(collapsed=False).add_to(base_map)
base_map
import os
base_map.save(os.path.join('SPEED_MONDAYS.html'))
The traffic on christmas eve (i.e. 24th of decembre 2019) starting from 5 PM and christmas day (the 25th of decembre)
sub_set = data[(data['YEAR']==2019) & (data['MONTH']==12) ].copy()
sub_set = sub_set[((sub_set['DAY'] == 24) & (sub_set['HOUR']>=17) ) | (sub_set['DAY'] == 25)]
l=[]
for date in sub_set.Time.sort_values().unique():
sub = sub_set[sub_set['Time'] == date][['CENTER_LAT','CENTER_LON','speed']].values.tolist()
l.append(sub)
base_map = generateBaseMap([41.881832, -87.623177])
HeatMapWithTime(l, index = sub_set.Time.sort_values().unique().tolist(),radius=40, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}, min_opacity=0.5, max_opacity=0.8, use_local_extrema=True).add_to(base_map)
fg = folium.FeatureGroup(name='The 29 regions of Chicago')
base_map.add_child(fg)
for i in range(29) :
r = plugins.FeatureGroupSubGroup(fg, str(list_REGION[i]))
base_map.add_child(r)
folium.Marker(l1[i]).add_to(r)
folium.LayerControl(collapsed=False).add_to(base_map)
base_map
import os
base_map.save(os.path.join('christmas.html'))
The traffic on rush hours (8 AM and 5 PM) on mondays (2018 & 2019)
sub_set = data[((data['YEAR']==2018) | (data['YEAR']==2019)) & (data['DAY_OF_WEEK'] == 2) & ((data['HOUR']==8) | (data['HOUR']==17) )].copy()
l=[]
for date in sub_set.Time.sort_values().unique():
sub = sub_set[sub_set['Time'] == date][['CENTER_LAT','CENTER_LON','speed']].values.tolist()
l.append(sub)
base_map = generateBaseMap([41.881832, -87.623177])
HeatMapWithTime(l, index = sub_set.Time.sort_values().unique().tolist(),radius=40, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}, min_opacity=0.5, max_opacity=0.8, use_local_extrema=True).add_to(base_map)
fg = folium.FeatureGroup(name='The 29 regions of Chicago')
base_map.add_child(fg)
for i in range(29) :
r = plugins.FeatureGroupSubGroup(fg, str(list_REGION[i]))
base_map.add_child(r)
folium.Marker(l1[i]).add_to(r)
folium.LayerControl(collapsed=False).add_to(base_map)
base_map
import os
base_map.save(os.path.join('MONDAYS_RUSH_HOURS.html'))